In [107]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set(color_codes=True)
In [250]:
data = pd.read_csv('../data/belorusskoye_in_out_1_min_2016_04.txt', delimiter='\t')
data.head()
Out[250]:
In [ ]:
Данные за рабочую среду апреля
In [98]:
In [251]:
data_wed = data.query('week_day == "wed"')
stations = map(lambda x: x, set(data['station_name'].as_matrix()))
In [253]:
pass_in = list(0 for i in range(1440))
pass_out = list(0 for i in range(1440))
In [254]:
pd.DataFrame(stations)
Out[254]:
In [255]:
belorus = pd.DataFrame(data.query('station_name == "МОСКВА БЕЛОРУССКАЯ"'))
In [256]:
belorus['time'] = 60 * belorus['hour'] + belorus['minute']
In [257]:
window(belorus['passengers_in'].as_matrix().transpose()).shape
Out[257]:
In [258]:
belorus['time'][N-1:].shape
Out[258]:
In [269]:
N = 30
def window(x):
return np.convolve(x, np.ones((N,))/N, mode='valid')
plt.plot(belorus['time'][N-1:], window(belorus['passengers_in'].as_matrix().transpose()), 'r-')
plt.plot(belorus['time'][N-1:], window(belorus['passenger_out'].as_matrix().transpose()), 'b-')
plt.axis([0,1440,0,600])
plt.show()
In [261]:
def in_out_dict(f):
return [{'time' : row[1]['time'], 'in': row[1]['passengers_in'], 'out': row[1]['passenger_out']}
for row in f.iterrows()]
In [262]:
from json import dumps, dump
import os
Сохранение данных
In [ ]:
In [265]:
path = '../data/stations_in_out/'
if not os.path.exists(path):
os.mkdir(path)
for station in stations:
for day_type in set(data['day_type'].values):
for week_day in set(data['week_day'].values):
station_data = pd.DataFrame(data.query('station_name == "{0}" and week_day == "{1}" and day_type == "{2}"'.format(station, week_day, day_type)))
station_data['time'] = 60 * station_data['hour'] + station_data['minute']
result = in_out_dict(station_data)
if not len(result) == 0:
with open(path + 'data_{0}_{1}_{2}.json'.format(station, week_day, day_type), 'w') as f:
dump(in_out_dict(station_data), f)
In [277]:
for station in stations:
for day_type in set(data['day_type'].values):
for week_day in set(data['week_day'].values):
station_data = pd.DataFrame(data.query('station_name == "{0}" and week_day == "{1}" and day_type == "{2}"'.format(station, week_day, day_type)))
if len(station_data) == 0:
continue
station_data['time'] = 60 * station_data['hour'] + station_data['minute']
plt.plot(station_data['time'][N-1:], window(station_data['passengers_in'].as_matrix().transpose()), 'r-')
plt.plot(station_data['time'][N-1:], window(station_data['passenger_out'].as_matrix().transpose()), 'b-')
# plt.axis([0,1440])
title = 'station_name == "{0}" and week_day == "{1}" and day_type == "{2}"'.format(station, week_day, day_type).decode('utf-8')
print title
plt.title(title)
plt.show()
In [ ]:
In [ ]: